Get link for data:
from urllib.request import urlopen
from bs4 import BeautifulSoup
# Get data into pandas df
URL = "https://www.michigan.gov/coronavirus/0,9753,7-406-98163_98173---,00.html"
HTML = urlopen(URL).read().decode("utf-8")
start_index = HTML.find("shortdesc")
end_index = HTML.find("footerArea")
data = HTML[start_index:end_index]
soup = BeautifulSoup(data, features="html.parser")
links = [link.get('href') for link in soup.find_all('a')]
finallink = "https://michigan.gov" + \
[i for i in links if "by_Date" in i][0]
Download data:
temp <- tempfile()
download.file(py$finallink, destfile = temp)
mi_data <- readxl::read_excel(temp)
Clean data:
mi_data = r.mi_data
max_date = max(mi_data["Updated"])
agg_data = mi_data.groupby(["Date"], as_index=False).sum()
date_update = format(py$max_date, '%d %b %Y')
mi_cases_by_day = py$agg_data
head(mi_cases_by_day)
## Date Cases Deaths Cases.Cumulative Deaths.Cumulative
## 1 2020-02-29 19:00:00 14 0 14 0
## 2 2020-03-01 19:00:00 13 1 27 1
## 3 2020-03-02 19:00:00 22 0 49 1
## 4 2020-03-03 19:00:00 24 0 73 1
## 5 2020-03-04 19:00:00 26 0 99 1
## 6 2020-03-05 19:00:00 42 0 141 1
Initial Data Visualization:
plot_ly(
mi_cases_by_day,
x = ~Date,
y = ~Cases
)
With 7 day moving average and deaths:
mi_cases_by_day <- mi_cases_by_day %>%
mutate(
cases_ma = rollapply(Cases, 7, mean, align = "center", fill = 0),
deaths_ma = rollapply(Deaths, 7, mean, align = "center", fill = 0)
)
ay <- list(tickfont = list(color = "red"),
overlaying = "y",
side = "right",
title = "Deaths")
plot_ly(mi_cases_by_day,x = ~Date) %>%
# Cases
add_trace(y = ~Cases, alpha = .6, name = "Cases", type = "scatter",
mode = 'markers') %>%
# Cases MA
add_lines(y = ~cases_ma, alpha = .8, name = "Cases MA", mode = 'markers') %>%
# Deaths
add_trace(name = "Deaths", yaxis = "y2", alpha = .15, y = ~Deaths, x = ~Date,
color = I("red"), type = "scatter", mode = 'markers') %>%
# Deaths MA
add_lines(name = "Deaths MA", yaxis = "y2", y = ~deaths_ma, x = ~Date,
line = list(color = I("red")), alpha = .8/4, mode = 'markers') %>%
layout(
title = "Michigan COVID Cases/Deaths<br>With 7-day Moving Average",
yaxis2 = ay, legend = list(x = 0.6, y = 0.9),margin = list(r = 50, t = 50)
)